This example demonstrates how to convert a network from Caffe's Model Zoo for use with Lasagne. We will be using the NIN model trained for CIFAR10.
We will create a set of Lasagne layers corresponding to the Caffe model specification (prototxt), then copy the parameters from the caffemodel file into our model.
If you just want to try the final result, you can download the pickled weights here
In [1]:
!wget https://www.dropbox.com/s/blrajqirr1p31v0/cifar10_nin.caffemodel
!wget https://gist.githubusercontent.com/ebenolson/91e2cfa51fdb58782c26/raw/b015b7403d87b21c6d2e00b7ec4c0880bbeb1f7e/model.prototxt
In [2]:
import caffe
In [3]:
net_caffe = caffe.Net('model.prototxt', 'cifar10_nin.caffemodel', caffe.TEST)
In [4]:
import lasagne
from lasagne.layers import InputLayer, DropoutLayer, FlattenLayer
from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.layers import MaxPool2DLayer as PoolLayer
from lasagne.utils import floatX
In [5]:
import theano
from theano.tensor.signal import downsample
# We need a recent theano version for this to work
assert theano.__version__ >= '0.7.0.dev-512c2c16ac1c7b91d2db3849d8e7f384b524d23b'
class AveragePool2DLayer(lasagne.layers.MaxPool2DLayer):
def get_output_for(self, input, **kwargs):
pooled = downsample.max_pool_2d(input,
ds=self.pool_size,
st=self.stride,
ignore_border=self.ignore_border,
padding=self.pad,
mode='average_exc_pad',
)
return pooled
In [6]:
net = {}
net['input'] = InputLayer((None, 3, 32, 32))
net['conv1'] = ConvLayer(net['input'], num_filters=192, filter_size=5, pad=2)
net['cccp1'] = ConvLayer(net['conv1'], num_filters=160, filter_size=1)
net['cccp2'] = ConvLayer(net['cccp1'], num_filters=96, filter_size=1)
net['pool1'] = PoolLayer(net['cccp2'], pool_size=3, stride=2)
net['drop3'] = DropoutLayer(net['pool1'], p=0.5)
net['conv2'] = ConvLayer(net['drop3'], num_filters=192, filter_size=5, pad=2)
net['cccp3'] = ConvLayer(net['conv2'], num_filters=192, filter_size=1)
net['cccp4'] = ConvLayer(net['cccp3'], num_filters=192, filter_size=1)
net['pool2'] = AveragePool2DLayer(net['cccp4'], pool_size=3, stride=2)
net['drop6'] = DropoutLayer(net['pool2'], p=0.5)
net['conv3'] = ConvLayer(net['drop6'], num_filters=192, filter_size=3, pad=1)
net['cccp5'] = ConvLayer(net['conv3'], num_filters=192, filter_size=1)
net['cccp6'] = ConvLayer(net['cccp5'], num_filters=10, filter_size=1)
net['pool3'] = AveragePool2DLayer(net['cccp6'], pool_size=8)
net['output'] = lasagne.layers.FlattenLayer(net['pool3'])
In [7]:
layers_caffe = dict(zip(list(net_caffe._layer_names), net_caffe.layers))
for name, layer in net.items():
try:
layer.W.set_value(layers_caffe[name].blobs[0].data)
layer.b.set_value(layers_caffe[name].blobs[1].data)
except AttributeError:
continue
In [8]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
In [9]:
!wget https://s3.amazonaws.com/lasagne/recipes/pretrained/cifar10/cifar10.npz
In [10]:
data = np.load('cifar10.npz')
In [11]:
prob = np.array(lasagne.layers.get_output(net['output'], floatX(data['whitened']), deterministic=True).eval())
predicted = np.argmax(prob, 1)
In [12]:
accuracy = np.mean(predicted == data['labels'])
print(accuracy)
In [13]:
net_caffe.blobs['data'].reshape(1000, 3, 32, 32)
net_caffe.blobs['data'].data[:] = data['whitened']
prob_caffe = net_caffe.forward()['pool3'][:,:,0,0]
In [14]:
np.allclose(prob, prob_caffe)
Out[14]:
In [15]:
def make_image(X):
im = np.swapaxes(X.T, 0, 1)
im = im - im.min()
im = im * 1.0 / im.max()
return im
plt.figure(figsize=(16, 5))
for i in range(0, 10):
plt.subplot(1, 10, i+1)
plt.imshow(make_image(data['raw'][i]), interpolation='nearest')
true = data['CLASSES'][data['labels'][i]]
pred = data['CLASSES'][predicted[i]]
color = 'green' if true == pred else 'red'
plt.text(0, 0, true, color='black', bbox=dict(facecolor='white', alpha=1))
plt.text(0, 32, pred, color=color, bbox=dict(facecolor='white', alpha=1))
plt.axis('off')
In [16]:
import pickle
values = lasagne.layers.get_all_param_values(net['output'])
pickle.dump(values, open('model.pkl', 'w'))